# bat occurance data
bat_occ <- rast(here("data","aggregated_data", "2017_aggregate_2017.tif"))
# roost location
bat_roosts <- st_read(here("data", "ca_bat_roosts", "ca_colonies.shp")) %>% st_make_valid()
## Reading layer `ca_colonies' from data source
## `/Users/annaboser/Documents/GitHub/larsen-lab-bats/data/ca_bat_roosts/ca_colonies.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 8 features and 5 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -121.9357 ymin: 38.24866 xmax: -121.2264 ymax: 38.83952
## Geodetic CRS: WGS 84
# turn bat occ data into a dataframe
bat_occ_df <- as.data.frame(bat_occ, xy=TRUE)
names(bat_occ_df) <- c("x", "y", "bats")
# x locations of roosts
x_roosts <- bat_roosts$xcoord
# y locations of roosts
y_roosts <- bat_roosts$ycoord
# plot out the raw bat data
ggplot() +
geom_raster(data = bat_occ_df, aes(x=x, y=y, fill=bats)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord))
spline <- lm(formula = bats ~ bs(x, degree = 3, knots = x_roosts)*bs(y, degree = 3, knots = y_roosts) , data = bat_occ_df)
bat_occ_df$spline <- spline$fitted.values
ggplot() +
geom_raster(data = bat_occ_df, aes(x=x, y=y, fill=spline)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord))
That did not work. Second trial: plot out the spatial decay for each of the 8 roosts
# Make a function that only keeps points that are within r from a point
spot_filter <- function(d, x_center, y_center, r){
# first do a rough approximation
d <- filter(d, x > x_center-r, x < x_center+r, y > y_center-r, y < y_center+r)
# then go over and get rid of the corners
d$dist = sqrt((d$x - x_center)^2+(d$y - y_center)^2)
d <- d[d$dist<r,]
return(d)
}
d <- spot_filter(d=bat_occ_df, x_center=x_roosts[1], y_center=y_roosts[1], r=0.05)
ggplot(d) +
geom_point(aes(x=dist, y=bats))
# fit some exponential
fit <- lm(log(bats) ~ log(dist), data = d)
ggplot(d) +
geom_point(aes(x=dist, y=bats)) +
geom_line(aes(x=dist, y=exp(fit$fitted.values)), color = "red")
# fit some exponential
fit <- glm(bats/max(bats) ~ dist, family = "binomial", data = d)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
ggplot(d) +
geom_point(aes(x=dist, y=bats)) +
geom_line(aes(x=dist, y=fit$fitted.values*max(bats)), color = "red")
I thing the logistic regression wins. Neither is perfect though… maybe there’s a better function out there. Now I need to code it up so that 1: I can get the logistic regressions for all 8 roosts 2: I can combine all of the regressions on the 3d plane
To combine all of the regressions, I think that I’ll 1: separately calculate the predictions over the entire study area for each regression 2: add them together 3: subtract the result from the observed bats
Ok full step by step: 1: add a column for distance to each roost to bat_occ_df 2: Fit a separate logistic regression for each roost based on the closest 0.05 degrees 3: generate predictions for each roost regression 4: add together the predictions for each roost regression 5: subtract the total roost regression from the bats 6: plot out the result
# 1: add a column for distance to each roost to bat_occ_df
add_dist_col <- function(d, roost_num){
x_center = bat_roosts$xcoord[roost_num]
y_center = bat_roosts$ycoord[roost_num]
d[paste0("dist", roost_num)] = sqrt((d$x - x_center)^2+(d$y - y_center)^2)
return(d)
}
for (i in 1:nrow(bat_roosts)){
bat_occ_df <- add_dist_col(bat_occ_df, i)
}
# 2: Fit a separate logistic regression for each roost based on the closest 0.05 degrees
# and
# 3: generate predictions for each roost regression
logistic_pred <- function(d, roost_num, r){
d['dist'] <- d[paste0("dist", roost_num)]
save <- d
d <- d[d['dist']<r,]
max_bats = max(d$bats)
fit <- glm(bats/max_bats ~ dist, family = "binomial", data = d)
plot <- ggplot(d) +
geom_point(aes(x=dist, y=bats)) +
geom_line(aes(x=dist, y=fit$fitted.values*max_bats), color = "red") +
ggtitle(paste("logistic", bat_roosts$xcoord[roost_num], bat_roosts$ycoord[roost_num]))
print(plot)
save[paste0("logistic", roost_num)] <- predict(fit, save)*max_bats
return(save)
}
exponential_pred <- function(d, roost_num, r){
d['dist'] <- d[paste0("dist", roost_num)]
save <- d
d <- d[d['dist']<r,]
fit <- lm(log(bats) ~ log(dist), data = d)
plot <- ggplot(d) +
geom_point(aes(x=dist, y=bats)) +
geom_line(aes(x=dist, y=exp(fit$fitted.values)), color = "red") +
ggtitle(paste("exponential", bat_roosts$xcoord[roost_num], bat_roosts$ycoord[roost_num]))
print(plot)
save[paste0("exponential", roost_num)] <- exp(predict(fit, save))
return(save)
}
for (i in 1:nrow(bat_roosts)){
bat_occ_df <- logistic_pred(d=bat_occ_df, roost_num=i, r=0.05)
}
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
for (i in 1:nrow(bat_roosts)){
bat_occ_df <- exponential_pred(d=bat_occ_df, roost_num=i, r=0.05)
}
# 4: add together the predictions for each roost regression
# and
# 5: subtract the total roost regression from the bats
corrected_pred <- function(d, func="logistic"){
d[func] <- 0
for (i in 1:nrow(bat_roosts)){
d[func] <- d[func] + d[paste0(func, i)]
}
d[paste0(func, "_pred")] <- d["bats"] - d[func]
return(d)
}
bat_occ_df <- corrected_pred(bat_occ_df, func="logistic")
bat_occ_df <- corrected_pred(bat_occ_df, func="exponential")
# 6: plot out the result
ggplot() +
geom_raster(data = bat_occ_df, aes(x=x, y=y, fill=bats)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord)) +
ggtitle("original_bats")
ggplot() +
geom_raster(data = bat_occ_df, aes(x=x, y=y, fill=logistic)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord)) +
ggtitle("logistic_predictions")
ggplot() +
geom_raster(data = bat_occ_df, aes(x=x, y=y, fill=logistic_pred)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord)) +
ggtitle("logistic_corrected_bats")
ggplot() +
geom_raster(data = bat_occ_df, aes(x=x, y=y, fill=bats)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord)) +
ggtitle("original_bats")
ggplot() +
geom_raster(data = bat_occ_df, aes(x=x, y=y, fill=exponential)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord)) +
ggtitle("exponential_predictions")
ggplot() +
geom_raster(data = bat_occ_df, aes(x=x, y=y, fill=exponential_pred)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord)) +
ggtitle("exponential_corrected_bats")
far_from_roosts <- filter(bat_occ_df,
dist1 > 0.01,
dist2 > 0.01,
dist3 > 0.01,
dist4 > 0.01,
dist5 > 0.01,
dist6 > 0.01,
dist7 > 0.01,
dist8 > 0.01)
ggplot() +
geom_raster(data = far_from_roosts, aes(x=x, y=y, fill=exponential_pred)) +
geom_point(data = bat_roosts, aes(x=xcoord, y=ycoord)) +
ggtitle("exponential_corrected_bats (0.01 degrees from roosts)")